R Session Processing
Load clinical Data
cgds <- cgdsr::CGDS("http://www.cbioportal.org/public-portal/")
#Studies<- cgdsr::getCancerStudies(cgds)
clinicalData <- cgdsr::getClinicalData(cgds, "gbm_tcga_pub_all")
#clinicalData <- read.csv("Clinical_tab.csv") #, na.strings=c("","NA")
survival plot
fit <- survival::survfit(Surv(OS_MONTHS, OS_STATUS) ~ DFS_STATUS, data = clinicalData)
survminer::ggsurvplot(fit, data = clinicalData,
type = "kaplan-meier",
#conf.type="log",
conf.int = TRUE,
pval = TRUE,
fun = "pct",
risk.table = TRUE,
size = 1,
linetype = "strata",
palette = c("#E7B800", "#2E9FDF"),
legend = "top",
lengend.title = "DFS_STATUS",
legend.labs = c("DiseaseFree", "Recurred")
)


R Session: Plot DiseaseFree vs Reccured during OS_MONTHS
clinicalData <- cgdsr::getClinicalData(cgds, "gbm_tcga_pub_all")
start_time <- Sys.time()
clinicalData %>%
mutate(OS_STATUS = gsub("LIVING", "0", OS_STATUS)) %>%
mutate(OS_STATUS = gsub( "DECEASED", "1", OS_STATUS)) %>%
mutate(DFS_STATUS = gsub( "^$|^ $", "DiseaseFree", DFS_STATUS)) %>%
mutate(OS_STATUS = as.numeric(OS_STATUS)) %>%
arrange(OS_MONTHS) %>%
mutate( DiseaseFree = ifelse(DFS_STATUS == "DiseaseFree", 1, 0)) %>%
as.data.frame() %>%
mutate(n_DiseaseFree = cumsum(DiseaseFree == 1)) %>%
mutate(n_Recurred = cumsum(DiseaseFree == 0)) %>%
ggplot(aes(x = OS_MONTHS, y = value, color = variable)) +
geom_point(aes(y = n_DiseaseFree, col = "n_DiseaseFree")) +
geom_point(aes(y = n_Recurred, col = "n_Recurred")) +
labs(title = paste("Using R Session, Running time = ", Sys.time() - start_time))

Spark Node: Plot DiseaseFree vs Reccured during OS_MONTHS
clinicalData <- cgdsr::getClinicalData(cgds, "gbm_tcga_pub_all")
sc <- spark_connect(master = "local",
version = "2.4.0")
Re-using existing Spark connection to local
clinicalData_tbl <- dplyr::copy_to(sc, clinicalData, overwrite = TRUE)
start_time <- Sys.time()
clinicalData_tbl %>%
mutate(OS_STATUS = regexp_replace(OS_STATUS, "LIVING", "0")) %>%
mutate(OS_STATUS = regexp_replace(OS_STATUS, "DECEASED", "1")) %>%
mutate(DFS_STATUS = regexp_replace(DFS_STATUS, "^$|^ $", "DiseaseFree")) %>%
mutate(OS_STATUS = as.numeric(OS_STATUS)) %>%
#mutate(OS_STATUS = regexp_replace(as.numeric(OS_STATUS), 'NaN', NA)) %>%
#mutate(OS_STATUS = regexp_replace(OS_STATUS, NaN, NA)) %>%
#na.replace('') %>% ## not good for OS_STATUS (0,1)
#dplyr::filter(!is.na(OS_MONTHS))
arrange(is.na(OS_MONTHS), OS_MONTHS) %>% ## OUFFF put Nan at the end of the column
mutate(DiseaseFree = ifelse(DFS_STATUS == "DiseaseFree", 1, 0)) %>%
as.data.frame() %>%
mutate( n_DiseaseFree = cumsum(as.numeric(DiseaseFree == 1 ))) %>%
mutate( n_Recurred = cumsum(as.numeric(DiseaseFree == 0 ))) %>%
ggplot(aes(x = OS_MONTHS, y = value, color = variable)) +
geom_point(aes(y = n_DiseaseFree, col = "n_DiseaseFree")) +
geom_point(aes(y = n_Recurred, col = "n_Recurred")) +
labs(title = paste("Using Spark Node, Running time = ", Sys.time() - start_time))

LS0tCnRpdGxlOiAic3Vydml2YWwgcGxvdCBzZXNzaW9uIFZTIHNwYXJrIG5vZGUiCmRhdGU6ICdgciBTeXMuRGF0ZSgpYCcKb3V0cHV0OiAKICAgIGh0bWxfZG9jdW1lbnQ6CiAgICBudW1iZXJfc2VjdGlvbnM6IHRydWUKICAgIGZpZ19jYXB0aW9uOiB0cnVlCiAgICB0b2M6IHRydWUKICAgIGZpZ193aWR0aDogNwogICAgZmlnX2hlaWdodDogNi41CiAgICB0aGVtZTogY29zbW8KICAgIGhpZ2hsaWdodDogdGFuZ28KICAgIGNvZGVfZm9sZGluZzogaGlkZQotLS0KCiMjIGxvYWQgbGlicmFyaWVzCmBgYHtyfQpsaWJyYXJ5KHN1cnZpdmFsKQpsaWJyYXJ5KHN1cnZtaW5lcikKbGlicmFyeShjZ2RzcikKbGlicmFyeShzcGFya2x5cikKbGlicmFyeShkcGx5cikKYGBgCgojICBSIFNlc3Npb24gUHJvY2Vzc2luZwoKIyMgTG9hZCBjbGluaWNhbCBEYXRhCmBgYHtyfQpjZ2RzIDwtIGNnZHNyOjpDR0RTKCJodHRwOi8vd3d3LmNiaW9wb3J0YWwub3JnL3B1YmxpYy1wb3J0YWwvIikKI1N0dWRpZXM8LSBjZ2Rzcjo6Z2V0Q2FuY2VyU3R1ZGllcyhjZ2RzKQpjbGluaWNhbERhdGEgPC0gY2dkc3I6OmdldENsaW5pY2FsRGF0YShjZ2RzLCAiZ2JtX3RjZ2FfcHViX2FsbCIpCgojY2xpbmljYWxEYXRhIDwtIHJlYWQuY3N2KCJDbGluaWNhbF90YWIuY3N2IikgIywgbmEuc3RyaW5ncz1jKCIiLCJOQSIpCgoKCmBgYAoKCiMjIFRyYW5zZm9ybWF0aW9ucyAxCgpgYGB7cn0KY2xpbmljYWxEYXRhJE9TX1NUQVRVUyA8LSBnc3ViKCJMSVZJTkciLCAiMCIsIGNsaW5pY2FsRGF0YSRPU19TVEFUVVMsIGlnbm9yZS5jYXNlID0gVFJVRSkKY2xpbmljYWxEYXRhJE9TX1NUQVRVUyA8LSBnc3ViKCJERUNFQVNFRCIsICIxIiwgY2xpbmljYWxEYXRhJE9TX1NUQVRVUywgaWdub3JlLmNhc2UgPSBUUlVFKQpjbGluaWNhbERhdGEkREZTX1NUQVRVUyA8LSBnc3ViKCJeJHxeICQiLCAiRGlzZWFzZUZyZWUiLCBjbGluaWNhbERhdGEkREZTX1NUQVRVUywgaWdub3JlLmNhc2UgPSBUUlVFKQpjbGluaWNhbERhdGEkT1NfU1RBVFVTIDwtIGFzLm51bWVyaWMoY2xpbmljYWxEYXRhJE9TX1NUQVRVUykKCmBgYAoKIyMgc3Vydml2YWwgcGxvdAoKYGBge3J9CmZpdCA8LSBzdXJ2aXZhbDo6c3VydmZpdChTdXJ2KE9TX01PTlRIUywgT1NfU1RBVFVTKSB+IERGU19TVEFUVVMsIGRhdGEgPSBjbGluaWNhbERhdGEpCiAgIHN1cnZtaW5lcjo6Z2dzdXJ2cGxvdChmaXQsIGRhdGEgPSBjbGluaWNhbERhdGEsCiAgICAgICAgICAgICAgICAgICAgICAgICAgdHlwZSA9ICJrYXBsYW4tbWVpZXIiLAogICAgICAgICAgICAgICAgICAgICAgICAgICNjb25mLnR5cGU9ImxvZyIsCiAgICAgICAgICAgICAgICAgICAgICAgICAgY29uZi5pbnQgPSBUUlVFLAogICAgICAgICAgICAgICAgICAgICAgICAgIHB2YWwgPSBUUlVFLAogICAgICAgICAgICAgICAgICAgICAgICAgIGZ1biA9ICJwY3QiLAogICAgICAgICAgICAgICAgICAgICAgICAgIHJpc2sudGFibGUgPSBUUlVFLAogICAgICAgICAgICAgICAgICAgICAgICAgIHNpemUgPSAxLAogICAgICAgICAgICAgICAgICAgICAgICAgIGxpbmV0eXBlID0gInN0cmF0YSIsCiAgICAgICAgICAgICAgICAgICAgICAgICAgcGFsZXR0ZSA9IGMoIiNFN0I4MDAiLCAiIzJFOUZERiIpLAogICAgICAgICAgICAgICAgICAgICAgICAgIGxlZ2VuZCA9ICJ0b3AiLAogICAgICAgICAgICAgICAgICAgICAgICAgIGxlbmdlbmQudGl0bGUgPSAiREZTX1NUQVRVUyIsCiAgICAgICAgICAgICAgICAgICAgICAgICAgbGVnZW5kLmxhYnMgPSBjKCJEaXNlYXNlRnJlZSIsICJSZWN1cnJlZCIpCiAgICkKYGBgCgoKIyMgUiBTZXNzaW9uOiBQbG90IERpc2Vhc2VGcmVlIHZzIFJlY2N1cmVkIGR1cmluZyBPU19NT05USFMKYGBge3J9CiAgY2xpbmljYWxEYXRhIDwtIGNnZHNyOjpnZXRDbGluaWNhbERhdGEoY2dkcywgImdibV90Y2dhX3B1Yl9hbGwiKQpzdGFydF90aW1lIDwtIFN5cy50aW1lKCkKICBjbGluaWNhbERhdGEgJT4lIAogIG11dGF0ZShPU19TVEFUVVMgPSBnc3ViKCJMSVZJTkciLCAiMCIsIE9TX1NUQVRVUykpICU+JQogIG11dGF0ZShPU19TVEFUVVMgPSBnc3ViKCAiREVDRUFTRUQiLCAiMSIsIE9TX1NUQVRVUykpICU+JQogIG11dGF0ZShERlNfU1RBVFVTID0gZ3N1YiggIl4kfF4gJCIsICJEaXNlYXNlRnJlZSIsIERGU19TVEFUVVMpKSAlPiUKICBtdXRhdGUoT1NfU1RBVFVTID0gYXMubnVtZXJpYyhPU19TVEFUVVMpKSAlPiUKICBhcnJhbmdlKE9TX01PTlRIUykgJT4lCiAgbXV0YXRlKCBEaXNlYXNlRnJlZSA9IGlmZWxzZShERlNfU1RBVFVTID09ICJEaXNlYXNlRnJlZSIsIDEsIDApKSAlPiUgCiAgYXMuZGF0YS5mcmFtZSgpICU+JQogIG11dGF0ZShuX0Rpc2Vhc2VGcmVlID0gY3Vtc3VtKERpc2Vhc2VGcmVlID09IDEpKSAlPiUKICBtdXRhdGUobl9SZWN1cnJlZCA9IGN1bXN1bShEaXNlYXNlRnJlZSA9PSAwKSkgJT4lCiAgZ2dwbG90KGFlcyh4ID0gT1NfTU9OVEhTLCB5ID0gdmFsdWUsIGNvbG9yID0gdmFyaWFibGUpKSArCiAgZ2VvbV9wb2ludChhZXMoeSA9IG5fRGlzZWFzZUZyZWUsIGNvbCA9ICJuX0Rpc2Vhc2VGcmVlIikpICsKICBnZW9tX3BvaW50KGFlcyh5ID0gbl9SZWN1cnJlZCwgY29sID0gIm5fUmVjdXJyZWQiKSkgKwogIGxhYnModGl0bGUgPSBwYXN0ZSgiVXNpbmcgUiBTZXNzaW9uLCBSdW5uaW5nIHRpbWUgPSAiLCBTeXMudGltZSgpIC0gc3RhcnRfdGltZSkpCmBgYAoKIyMgU3BhcmsgTm9kZTogUGxvdCBEaXNlYXNlRnJlZSB2cyBSZWNjdXJlZCBkdXJpbmcgT1NfTU9OVEhTIApgYGB7cn0KIGNsaW5pY2FsRGF0YSA8LSBjZ2Rzcjo6Z2V0Q2xpbmljYWxEYXRhKGNnZHMsICJnYm1fdGNnYV9wdWJfYWxsIikKIHNjIDwtIHNwYXJrX2Nvbm5lY3QobWFzdGVyID0gImxvY2FsIiwKICAgICAgICAgICAgICAgICAgICAgdmVyc2lvbiA9ICIyLjQuMCIpCgogY2xpbmljYWxEYXRhX3RibCA8LSBkcGx5cjo6Y29weV90byhzYywgY2xpbmljYWxEYXRhLCBvdmVyd3JpdGUgPSBUUlVFKQogIHN0YXJ0X3RpbWUgPC0gU3lzLnRpbWUoKQogIGNsaW5pY2FsRGF0YV90YmwgJT4lCiAgbXV0YXRlKE9TX1NUQVRVUyA9IHJlZ2V4cF9yZXBsYWNlKE9TX1NUQVRVUywgIkxJVklORyIsICIwIikpICU+JQogIG11dGF0ZShPU19TVEFUVVMgPSByZWdleHBfcmVwbGFjZShPU19TVEFUVVMsICJERUNFQVNFRCIsICIxIikpICU+JQogIG11dGF0ZShERlNfU1RBVFVTID0gcmVnZXhwX3JlcGxhY2UoREZTX1NUQVRVUywgIl4kfF4gJCIsICJEaXNlYXNlRnJlZSIpKSAlPiUKICBtdXRhdGUoT1NfU1RBVFVTID0gYXMubnVtZXJpYyhPU19TVEFUVVMpKSAlPiUKICAjbXV0YXRlKE9TX1NUQVRVUyA9IHJlZ2V4cF9yZXBsYWNlKGFzLm51bWVyaWMoT1NfU1RBVFVTKSwgJ05hTicsIE5BKSkgJT4lCiAgI211dGF0ZShPU19TVEFUVVMgPSByZWdleHBfcmVwbGFjZShPU19TVEFUVVMsIE5hTiwgTkEpKSAlPiUKICAjbmEucmVwbGFjZSgnJykgJT4lICAjIyBub3QgZ29vZCBmb3IgT1NfU1RBVFVTICgwLDEpCiAgI2RwbHlyOjpmaWx0ZXIoIWlzLm5hKE9TX01PTlRIUykpIAogIGFycmFuZ2UoaXMubmEoT1NfTU9OVEhTKSwgT1NfTU9OVEhTKSAlPiUgICMjIE9VRkZGIHB1dCBOYW4gYXQgdGhlIGVuZCBvZiB0aGUgY29sdW1uCiAgbXV0YXRlKERpc2Vhc2VGcmVlID0gaWZlbHNlKERGU19TVEFUVVMgPT0gIkRpc2Vhc2VGcmVlIiwgMSwgMCkpICU+JSAKICBhcy5kYXRhLmZyYW1lKCkgJT4lCiAgbXV0YXRlKCBuX0Rpc2Vhc2VGcmVlID0gY3Vtc3VtKGFzLm51bWVyaWMoRGlzZWFzZUZyZWUgPT0gMSApKSkgJT4lCiAgbXV0YXRlKCBuX1JlY3VycmVkID0gY3Vtc3VtKGFzLm51bWVyaWMoRGlzZWFzZUZyZWUgPT0gMCApKSkgJT4lCiAgZ2dwbG90KGFlcyh4ID0gT1NfTU9OVEhTLCB5ID0gdmFsdWUsIGNvbG9yID0gdmFyaWFibGUpKSArCiAgZ2VvbV9wb2ludChhZXMoeSA9IG5fRGlzZWFzZUZyZWUsIGNvbCA9ICJuX0Rpc2Vhc2VGcmVlIikpICsKICBnZW9tX3BvaW50KGFlcyh5ID0gbl9SZWN1cnJlZCwgY29sID0gIm5fUmVjdXJyZWQiKSkgICsKICAgbGFicyh0aXRsZSA9IHBhc3RlKCJVc2luZyBTcGFyayBOb2RlLCBSdW5uaW5nIHRpbWUgPSAiLCBTeXMudGltZSgpIC0gc3RhcnRfdGltZSkpCmBgYAoK